USE DataMiningDW
GO

SELECT * 
FROM dbo.FuzzyGrouped
WHERE Nazwa <> Nazwa_clean;
GO

/*
UPDATE dbo.FuzzyGrouping 
SET Nazwa = Nazwa_clean
FROM dbo.FuzzyGrouping AS S JOIN dbo.FuzzyGrouped AS FG
ON S.ID = FG._key_in
WHERE S.ID IN (3,12,18,7,10);
*/

USE AdventureWorksDW
GO

SELECT *, 
	CASE WHEN Occupation  = 'Professional' THEN 1 ELSE 0 END AS Professional,
	CASE WHEN Occupation  = 'Clerical' THEN 1 ELSE 0 END AS Clerical,
	CASE WHEN Occupation  = 'Management' THEN 1 ELSE 0 END AS Management,
	CASE WHEN Occupation  = 'Skilled Manual' THEN 1 ELSE 0 END AS SkilledManual,
	CASE WHEN Occupation  = 'Manual' THEN 1 ELSE 0 END AS Manual
FROM CustomersHistory;


 WITH Coding AS
 (SELECT *, 
	CASE WHEN Occupation  = 'Professional' THEN 1 ELSE 0 END AS Professional,
	CASE WHEN Occupation  = 'Clerical' THEN 1 ELSE 0 END AS Clerical,
	CASE WHEN Occupation  = 'Management' THEN 1 ELSE 0 END AS Management,
	CASE WHEN Occupation  = 'Skilled Manual' THEN 1 ELSE 0 END AS SkilledManual,
	CASE WHEN Occupation  = 'Manual' THEN 1 ELSE 0 END AS Manual
 FROM CustomersHistory)
SELECT AVG(Professional*1.00), AVG(Clerical*1.00), 
AVG(Management*1.00), AVG(SkilledManual*1.00), AVG(Manual*1.00)
FROM Coding;
GO

SELECT *, CASE Education 
	WHEN 'Partial High School' THEN 1
	WHEN 'High School' THEN 2
	WHEN 'Bachelors' THEN 3
	WHEN 'Partial College' THEN 4
	WHEN 'Graduate Degree' THEN 5 END AS EducationOrdered
FROM dbo.CustomersHistory;
GO

SELECT *, DATEPART(weekday, DateFirstPurchase) AS WeekDay
FROM dbo.CustomersHistory;
GO

SELECT *, DATEDIFF(YEAR,BirthDate,GETDATE()) AS Age
FROM dbo.CustomersHistory;
GO

/*
WITH Outliers AS
(SELECT TOP 1 PERCENT YearlyIncome
FROM dbo.CustomersHistory
ORDER BY YearlyIncome DESC)
DELETE FROM Outliers;
*/

/*
WITH Outliers AS
(SELECT TOP 1 percent YearlyIncome
FROM dbo.CustomersHistory
ORDER BY YearlyIncome DESC)
UPDATE Outliers
SET YearlyIncome=150000;
*/

SELECT MIN(TotalChildren), MAX(TotalChildren)
FROM dbo.CustomersHistory;
GO

DECLARE @minTotalAmount AS DECIMAL; 
DECLARE @maxTotalAmount AS DECIMAL;
SELECT @minTotalAmount = MIN(TotalAmount),@maxTotalAmount = MAX(TotalAmount)
FROM dbo.CustomersHistory;
SELECT TotalAmount,(TotalAmount-@minTotalAmount)/(@maxTotalAmount -@minTotalAmount) 
AS LinearTotalAmount
FROM dbo.CustomersHistory;
GO

DECLARE @avgTotalAmount AS DECIMAL; 
DECLARE @devTotalAmount AS DECIMAL;
SELECT @avgTotalAmount = AVG(TotalAmount),@devTotalAmount = STDEV(TotalAmount)
FROM dbo.CustomersHistory;
SELECT TotalAmount
,(TotalAmount-@avgTotalAmount)/@devTotalAmount AS StandardizedTotalAmount
FROM dbo.CustomersHistory;
GO

SELECT TotalChildren
,1/(1+POWER(2.7182818,-TotalChildren)) AS LogisticTotalChildren
FROM dbo.CustomersHistory;
GO

SELECT CASE 
	WHEN YearlyIncome < 36667 THEN 'Very Low'
	WHEN YearlyIncome < 63333 THEN 'Low'
	WHEN YearlyIncome < 90000 THEN 'Average'
	WHEN YearlyIncome < 116667 THEN 'High'
	WHEN YearlyIncome < 143333 THEN 'Very High'
	ELSE 'Enormous' END AS YearlyIncomeDiscretized
FROM CustomersHistory;
GO

WITH Discretization AS
(SELECT CASE 
	WHEN YearlyIncome< 36667 THEN 'Very Low'
	WHEN YearlyIncome< 63333 THEN 'Low'
	WHEN YearlyIncome< 90000 THEN 'Average'
	WHEN YearlyIncome< 116667 THEN 'High'
	WHEN YearlyIncome< 143333 THEN 'Very High'
	ELSE 'Enormous' END AS YearlyIncomeDiscretized
FROM CustomersHistory)
SELECT YearlyIncomeDiscretized,COUNT (*) AS Freq
FROM Discretization
GROUP BY YearlyIncomeDiscretized;
GO

SELECT (CONVERT(INTEGER, [CalendarYear]) * 100) + CONVERT(INTEGER, MonthNumberOfYear) 
AS [TimeIndex]
FROM dbo.DimDate;
GO

SELECT (CONVERT(INTEGER, [CalendarYear]) * 100) + CONVERT(INTEGER, MonthNumberOfYear) 
AS [TimeIndex],
COALESCE(pc.EnglishProductCategoryName,'Total') AS Category,
SUM (f.ExtendedAmount) AS Amount, SUM(f.OrderQuantity) AS Quantity
FROM [dbo].[FactInternetSales] f
INNER JOIN [dbo].[DimDate] d
	ON f.[OrderDateKey] = d.[DateKey]
INNER JOIN [dbo].[DimProduct] p
	ON f.[ProductKey] = p.[ProductKey]
INNER JOIN [dbo].[DimProductSubcategory] psc
	ON p.[ProductSubcategoryKey] = psc.[ProductSubcategoryKey]
INNER JOIN [dbo].[DimProductCategory] pc
	ON psc.[ProductCategoryKey] = pc.[ProductCategoryKey]
GROUP BY GROUPING SETS (
(pc.EnglishProductCategoryName,(CONVERT(INTEGER, [CalendarYear]) * 100) + CONVERT(INTEGER, MonthNumberOfYear)),
(CONVERT(INTEGER, [CalendarYear]) * 100) + CONVERT(INTEGER, MonthNumberOfYear));
GO

CREATE VIEW SalesHistoryInterleaved AS
SELECT (CONVERT(INTEGER, [CalendarYear]) * 100) + CONVERT(INTEGER, MonthNumberOfYear) 
AS [TimeIndex],
COALESCE(pc.EnglishProductCategoryName,'Total') AS Category,
SUM (f.ExtendedAmount) AS Amount, SUM(f.OrderQuantity) AS Quantity
FROM [dbo].[FactInternetSales] f
INNER JOIN [dbo].[DimDate] d
	ON f.[OrderDateKey] = d.[DateKey]
INNER JOIN [dbo].[DimProduct] p
	ON f.[ProductKey] = p.[ProductKey]
INNER JOIN [dbo].[DimProductSubcategory] psc
	ON p.[ProductSubcategoryKey] = psc.[ProductSubcategoryKey]
INNER JOIN [dbo].[DimProductCategory] pc
	ON psc.[ProductCategoryKey] = pc.[ProductCategoryKey]
GROUP BY GROUPING SETS (
(pc.EnglishProductCategoryName,(CONVERT(INTEGER, [CalendarYear]) * 100) + CONVERT(INTEGER, MonthNumberOfYear)),
(CONVERT(INTEGER, [CalendarYear]) * 100) + CONVERT(INTEGER, MonthNumberOfYear));
GO

SELECT (CONVERT(INTEGER, [CalendarYear]) * 100) + CONVERT(INTEGER, MonthNumberOfYear) 
AS [TimeIndex],
SUM(f.OrderQuantity) AS TotalQuantity, SUM (f.ExtendedAmount) AS TotalAmount,
SUM( CASE pc.EnglishProductCategoryName
		WHEN 'Bikes' THEN f.OrderQuantity
		ELSE 0 END) AS [BikesQuantity],
SUM( CASE pc.EnglishProductCategoryName
		WHEN 'Bikes' THEN f.ExtendedAmount
		ELSE 0 END) AS [BikesAmount],
SUM( CASE pc.EnglishProductCategoryName
		WHEN 'Clothing' THEN  f.OrderQuantity
		ELSE 0 END) AS [ClothingQuantity],
SUM( CASE pc.EnglishProductCategoryName
		WHEN 'Clothing' THEN f.ExtendedAmount
		ELSE 0 END) AS [ClothingAmount],
SUM( CASE pc.EnglishProductCategoryName
		WHEN 'Accessories' THEN f.OrderQuantity
		ELSE 0 END) AS [AccessoriesQuantity],
SUM( CASE pc.EnglishProductCategoryName
		WHEN 'Accessories' THEN  f.ExtendedAmount
		ELSE 0 END) AS [AccessoriesAmount]
FROM [dbo].[FactInternetSales] f
INNER JOIN [dbo].[DimDate] d
	ON f.[OrderDateKey] = d.[DateKey]
INNER JOIN [dbo].[DimProduct] p
	ON f.[ProductKey] = p.[ProductKey]
INNER JOIN [dbo].[DimProductSubcategory] psc
	ON p.[ProductSubcategoryKey] = psc.[ProductSubcategoryKey]
INNER JOIN [dbo].[DimProductCategory] pc
	ON psc.[ProductCategoryKey] = pc.[ProductCategoryKey]
GROUP BY (CONVERT(INTEGER, [CalendarYear]) * 100) + CONVERT(INTEGER, MonthNumberOfYear)
ORDER BY [TimeIndex] DESC;
GO

CREATE VIEW SalesHistory AS
SELECT (CONVERT(INTEGER, [CalendarYear]) * 100) + CONVERT(INTEGER, MonthNumberOfYear) 
AS [TimeIndex],
SUM(f.OrderQuantity) AS TotalQuantity, SUM (f.ExtendedAmount) AS TotalAmount,
SUM( CASE pc.EnglishProductCategoryName
		WHEN 'Bikes' THEN f.OrderQuantity
		ELSE 0 END) AS [BikesQuantity],
SUM( CASE pc.EnglishProductCategoryName
		WHEN 'Bikes' THEN f.ExtendedAmount
		ELSE 0 END) AS [BikesAmount],
SUM( CASE pc.EnglishProductCategoryName
		WHEN 'Clothing' THEN  f.OrderQuantity
		ELSE 0 END) AS [ClothingQuantity],
SUM( CASE pc.EnglishProductCategoryName
		WHEN 'Clothing' THEN f.ExtendedAmount
		ELSE 0 END) AS [ClothingAmount],
SUM( CASE pc.EnglishProductCategoryName
		WHEN 'Accessories' THEN f.OrderQuantity
		ELSE 0 END) AS [AccessoriesQuantity],
SUM( CASE pc.EnglishProductCategoryName
		WHEN 'Accessories' THEN  f.ExtendedAmount
		ELSE 0 END) AS [AccessoriesAmount]
FROM [dbo].[FactInternetSales] f
INNER JOIN [dbo].[DimDate] d
	ON f.[OrderDateKey] = d.[DateKey]
INNER JOIN [dbo].[DimProduct] p
	ON f.[ProductKey] = p.[ProductKey]
INNER JOIN [dbo].[DimProductSubcategory] psc
	ON p.[ProductSubcategoryKey] = psc.[ProductSubcategoryKey]
INNER JOIN [dbo].[DimProductCategory] pc
	ON psc.[ProductCategoryKey] = pc.[ProductCategoryKey]
GROUP BY (CONVERT(INTEGER, [CalendarYear]) * 100) + CONVERT(INTEGER, MonthNumberOfYear);
GO

SELECT t1.col1 AS StartOfGap, MIN(t2.col1) AS EndOfGap  
FROM (SELECT col1 = TimeIndex + 1  FROM dbo.SalesHistory tbl1  
      WHERE NOT EXISTS (SELECT * FROM dbo.SalesHistory tbl2  
                      	WHERE tbl2.TimeIndex = tbl1.TimeIndex + 1) 
      AND TimeIndex <> (SELECT MAX(TimeIndex) FROM dbo.SalesHistory)) t1 
INNER JOIN  
   (SELECT col1 = TimeIndex - 1  FROM dbo.SalesHistory tbl1  
    WHERE NOT EXISTS(SELECT * FROM dbo.SalesHistory tbl2  
                      WHERE tbl1.TimeIndex = tbl2.TimeIndex + 1) 
      AND TimeIndex <> (SELECT MIN(TimeIndex) FROM dbo.SalesHistory)) t2  
   ON t1.col1 <= t2.col1 
   GROUP BY t1.col1;
GO

WITH Groups AS (
SELECT TimeIndex,TimeIndex-ROW_NUMBER() OVER(ORDER BY TimeIndex) as Dif
FROM SalesHistory)
SELECT MIN(TimeIndex), MAX(TimeIndex)
FROM Groups 
GROUP BY Dif;
GO